version 18.0               // version control
set processors 8           // to ensure replicability across different numbers of cores
clear all                  // clear existing data
macro drop _all            // and macros, clean slate
set seed 20220909          // set seed

local pgm  "dst-Appendix_Table_A2c_sleep_deprivation_ols"            // file name
local who  "Muzhe Yang"                                              // author
local dte  "2025-01-22"                                              // created date
local dte2 "`c(current_date)'"                                       // last run date
local tag  "`pgm'.do, created by `who' on `dte', last run on `dte2'"

capture log close
log using "code\analysis\tables\appendix\\`pgm'.txt", text replace 
display "`tag'"

**# data prep ------------------------------------------------------------------------------------

use "data_clean\dst-data04_for_estimation_within_250_miles", clear
summ dist_to_border
tab wave, missing
preserve 
keep wave TractFIPS sleep_depr
reshape wide sleep_depr, i(TractFIPS) j(wave)
compare sleep_depr1 sleep_depr2  // two waves of data are identical, all from 2018
restore
keep if wave == 2
codebook TractFIPS

*## (1) create the 9 cells, following page 215 of the following paper:
*## Giuntella, O. and F. Mazzonna (2019). "Sunset Time and the Economic Effects of Social Jetlag: Evidence from US Time Zone Borders." Journal of Health Economics 65: 210-226.

assert !missing(centroid_lat)
assert !missing(region)
gen     cell = 1 if centroid_lat > 40                        & region == "eastern and central"
replace cell = 2 if (34 < centroid_lat & centroid_lat <= 40) & region == "eastern and central"
replace cell = 3 if centroid_lat <= 34                       & region == "eastern and central"
replace cell = 4 if centroid_lat > 40                        & region == "central and mountain"
replace cell = 5 if (34 < centroid_lat & centroid_lat <= 40) & region == "central and mountain"
replace cell = 6 if centroid_lat <= 34                       & region == "central and mountain"
replace cell = 7 if centroid_lat > 40                        & region == "mountain and pacific"
replace cell = 8 if (34 < centroid_lat & centroid_lat <= 40) & region == "mountain and pacific"
replace cell = 9 if centroid_lat <= 34                       & region == "mountain and pacific"
tab cell time_zone, missing
summ centroid_lat if cell == 1 | cell == 4 | cell == 7
summ centroid_lat if cell == 2 | cell == 5 | cell == 8
summ centroid_lat if cell == 3 | cell == 6 | cell == 9

*## (2) control variables

global x "pop white_pct black_pct hispanic_pct pop_under_18yr_pct pop_65yr_over_pct age_median educ_hs_pct educ_coll_pct married_pct hh_size hh_income_median home_value_median no_health_ins_pct unemploy_pct"
global cvars    c.($x dist_to_border centroid_lat daylight_dur_max)##c.($x dist_to_border centroid_lat daylight_dur_max)
global controls $cvars i.cell i.cell#c.($x dist_to_border centroid_lat daylight_dur_max)

des  $x dist_to_border centroid_lat daylight_dur_max
summ $x dist_to_border centroid_lat daylight_dur_max

**# estimation prep -------------------------------------------------

egen nomiss = rowmiss($x dist_to_border centroid_lat daylight_dur_max)
assert !missing(StateAbbr)
local radius = 50
gen sample_selection = (nomiss == 0 & StateAbbr != "AZ" & dist_to_border <= `radius')

**# table ---------------------------------------------------------------------------------

hettreatreg $x dist_to_border daylight_dur_max centroid_lat i.cell if sample_selection == 1, outcome(sleep_depr) treatment(treat) vce(cluster CountyFIPS)
estimates store m1
scalar case_1 = "Yes"
scalar case_2 = "No"
scalar prop_1 = e(p1)
scalar prop_0 = e(p0)
scalar w1 = e(w1)
scalar w0 = e(w0)
scalar delta = e(delta)
scalar ate = e(ate)
scalar att = e(att)
scalar atu = e(atu)
scalar ols = e(ols2)  // OLS estimate of the treatment effect, obtained as e(w1)*e(att)+e(w0)*e(atu)
scalar ate_cal = e(p1)*e(att) + e(p0)*e(atu)
scalar bias = delta*(atu - att)
quietly etable, replace ///
        column(index) ///
        keep(treat) ///
        cstat(_r_b,  nformat(%9.3f)) ///
        cstat(_r_se, nformat(%9.3f)) ///
        mstat(N, nformat(%9.0fc) label("Number of observations")) ///
        mstat(ctrl_case_1 = case_1, label("Case 1: linear controls used")) ///
        mstat(ctrl_case_2 = case_2, label("Case 2: flexible controls used")) ///
        mstat(case_prop_1 = prop_1, nformat(%9.3f) label("Proportion of treated units (p1)")) ///
        mstat(case_prop_0 = prop_0, nformat(%9.3f) label("Proportion of untreated units (p0)")) ///
        mstat(case_w1 = w1, nformat(%9.3f) label("OLS weight on ATT (w1)")) ///
        mstat(case_w0 = w0, nformat(%9.3f) label("OLS weight on ATU (w0)")) ///
        mstat(case_delta = delta,     nformat(%9.3f) label("Diagnostic for interpreting OLS as ATE (delta)")) ///
        mstat(case_ate = ate,         nformat(%9.3f) label("Implicit OLS estimate of ATE")) ///
        mstat(case_att = att,         nformat(%9.3f) label("Implicit OLS estimate of ATT")) ///
        mstat(case_atu = atu,         nformat(%9.3f) label("Implicit OLS estimate of ATU")) ///
        mstat(case_ols = ols,         nformat(%9.3f) label("w1 × ATT + w0 × ATU = OLS")) ///
        mstat(case_ate_cal = ate_cal, nformat(%9.3f) label("p1 × ATT + p0 × ATU = ATE")) ///
        mstat(case_bias = bias,       nformat(%9.3f) label("OLS – ATE = delta × (ATU – ATT)")) ///
        stars(0.10 "*" 0.05 "**" 0.01 "***", attach(_r_b) decreasing pvname("p-value") nformat(%9.2f)) showstars showstarsnote ///
        novarlabel nocenter 

hettreatreg $controls if sample_selection == 1, outcome(sleep_depr) treatment(treat) vce(cluster CountyFIPS)
estimates store m2
scalar case_1 = "No"
scalar case_2 = "Yes"
scalar prop_1 = e(p1)
scalar prop_0 = e(p0)
scalar w1 = e(w1)
scalar w0 = e(w0)
scalar delta = e(delta)
scalar ate = e(ate)
scalar att = e(att)
scalar atu = e(atu)
scalar ols = e(ols2)
scalar ate_cal = e(p1)*e(att) + e(p0)*e(atu)
scalar bias = delta*(atu - att)
quietly etable, append

hettreatreg $x dist_to_border daylight_dur_max centroid_lat i.cell if sample_selection == 1 & region == "eastern and central", outcome(sleep_depr) treatment(treat) vce(cluster CountyFIPS)
estimates store m3
scalar case_1 = "Yes"
scalar case_2 = "No"
scalar prop_1 = e(p1)
scalar prop_0 = e(p0)
scalar w1 = e(w1)
scalar w0 = e(w0)
scalar delta = e(delta)
scalar ate = e(ate)
scalar att = e(att)
scalar atu = e(atu)
scalar ols = e(ols2)
scalar ate_cal = e(p1)*e(att) + e(p0)*e(atu)
scalar bias = delta*(atu - att)
quietly etable, append

hettreatreg $controls if sample_selection == 1 & region == "eastern and central", outcome(sleep_depr) treatment(treat) vce(cluster CountyFIPS)
estimates store m4
scalar case_1 = "No"
scalar case_2 = "Yes"
scalar prop_1 = e(p1)
scalar prop_0 = e(p0)
scalar w1 = e(w1)
scalar w0 = e(w0)
scalar delta = e(delta)
scalar ate = e(ate)
scalar att = e(att)
scalar atu = e(atu)
scalar ols = e(ols2)
scalar ate_cal = e(p1)*e(att) + e(p0)*e(atu)
scalar bias = delta*(atu - att)
quietly etable, append

collect title "Table: The Impact of Circadian Misalignment on Sleep Deprivation Estimated by OLS" 

collect layout 
collect addtags top_row[1]
collect recode etable_estimates 1 = column1 2 = column1 ///
                                3 = column2 4 = column2 
collect layout (colname[treat]#result[_r_b _r_se] result[N ctrl_case_1 ctrl_case_2 case_prop_1 case_prop_0 case_w1 case_w0 case_delta case_ate case_att case_atu case_ols case_ate_cal case_bias]) ///
               (top_row[1]#etable_estimates[column1 column2]#cmdset#stars)

collect label levels top_row ///
        1 "Treat = 1 for census tracts located east of the time zone border; Treat = 0 for census tracts located west of the time zone border", modify
collect label levels etable_estimates ///
        column1 "All time zones in the Contiguous United States" ///
        column2 "Eastern Time Zone and Central Time Zone", modify 
collect label levels cmdset ///
        1 "(1)" 2 "(2)" 3 "(3)" 4 "(4)", modify
collect label levels colname ///
        treat "Treat (1/0)", modify 

collect style header top_row, level(label)
collect style header etable_estimates, level(label)
collect style header cmdset, level(label)
collect style header colname, level(label)
collect style cell result[_r_b N case_prop_1 case_prop_0 case_w1 case_w0 case_delta case_ate case_att case_atu case_ols case_ate_cal case_bias], sformat(%s)
collect style cell border_block[corner], border(top, pattern(double))
collect style cell border_block[column-header], border(top, pattern(double))
collect style cell border_block[row-header], border(bottom, pattern(double))
collect style cell border_block[item], border(bottom, pattern(double))

collect export "code\analysis\tables\appendix\\`pgm'.xlsx", replace

log close
exit